################################################################################
# Replication code for "Knowing is Disturbing": Online appendices      
# Date: 2025-09-29                                            
# Author: Danqi Guo & Genia Kostka                            
################################################################################

### clear history
rm(list = ls())

## set working directory to the folder where the data and code are stored
# setwd("your working directory")

# Load library, if not installed yet, please install them first using install.packages("package name")
library(tidyverse)
library(vtable)
library(factoextra)
library(sjPlot)
library(coefplot)
library(mediation)
library(grf)
library(estimatr)
library(patchwork)
library(knitr)
library(kableExtra)

# load preprocessed data 
data<-read.csv("data_cleaned_rep.csv",sep=",")

################################################################################
##  Appendix A Additional Details about Data Collection and Sample
# ##############################################################################

# Table A2.1. Summary statistics of the survey sample
labs_tab_a2.1<-c("Age","Gender","Education","Ethnic minority","Monthly income","Rural","CCP membership",
                 "Overseas experience (Yes)","Political ideology: nationalism","Political ideology: liberalism","VPN Savvy","Conservation",
                 "Big Five: openness","Big Five: conscientiousness","Big Five: neuroticism",
                 "Big Five: extraversion","Big Five: agreeableness","Life Satisfaction")

data%>%dplyr::select(Age,Gender,Education,Ethnicity,MonthlyIncome,rural,CCPMembership,OverseaExperience_yes,
                     nationalism_nor,liberalism_nor,vpnsavvy,conservation_nor,openness_nor,
                     conscientiousness_nor,neuroticism_nor,extraversion_nor,agreeableness_nor,LifeSat_1)%>%
  sumtable(summ=c('mean(x)','sd(x)','min(x)','max(x)'),labels=labs_tab_a2.1,title="",
  note="Note: Sample size N= 4,507. Political ideology, VPN savvy, conservation, and the Big Five personality 
  traits are normalized.",
           numformat = formatfunc(digits = 2, nsmall = 2))


## Table A2.2. Comparison of survey sample and national population distribution
## Note: the results reproduces the first column of Table A2.2, the statistics in
## the second column was manually entered based the sources in the notation. 

### convert variables into factor variables for gender, education, ethnic minority, ccp_member, rural residency
### gender
data$Gender_fac<-as.factor(data$Gender)
### education
data$education_fac <- factor(data$Education, 
                             levels = c( 1,2,3,4,5),
                             labels = c("Primary or less", "Junior secondary", "Senior seconday", "Undergraduate", "Postgraduate"))
### ethinic minority
data$ethnicity_minority <-factor(data$Ethnicity,
                                 levels =c(0,1),
                                 labels=c("No","Yes"))
### ccp membership
data$ccp_member<-factor(data$CCPMembership,
                        levels=c(0,1),
                        labels=c("No","Yes"))
### rural residency
data$rural_fac<-factor(data$rural,
                       levels=c(0,1),
                       labels=c("No","Yes"))

### four region in China according to the Chinese national bureau of statistics
table(data$Province)
data$region_4<-case_when(data$Province %in% c("北京","天津","河北","山东","江苏","上海","浙江","广东","福建","海南")~"eastern",
                      data$Province %in% c("河南","山西","安徽","江西","湖南","湖北")~"central",
                      data$Province %in% c("陕西","内蒙古","广西","重庆","新疆","宁夏","青海",
                                           "甘肃","四川","西藏","贵州","云南")~"western",
                      data$Province %in% c("吉林","辽宁","黑龙江")~"northeastern")

class(data$region_4)
data%>%dplyr::select(age2,Gender_fac,region_4,education_fac,rural_fac,ethnicity_minority,ccp_member)%>%
  sumtable()

################################################################################
#  Appendix B Information about survey experiment
################################################################################

#  Table B1. Balance Table (Randomization check)      
labs_tab_b1<-c("Age","Gender","Education","Ethnic minority","Monthly income","Rural","CCP membership",
               "Overseas experience (Yes)","Political ideology: nationalism","Political ideology: liberalism","VPN Savvy","Conservation",
               "Big Five: openness","Big Five: conscientiousness","Big Five: neuroticism",
               "Big Five: extraversion","Big Five: agreeableness","Life Satisfaction")

data%>%dplyr::select(condition,Age,Gender,Education,Ethnicity,MonthlyIncome,rural,CCPMembership,OverseaExperience_yes,
                     nationalism_nor,liberalism_nor,vpnsavvy,conservation_nor,openness_nor,
                     conscientiousness_nor,neuroticism_nor,extraversion_nor,agreeableness_nor,LifeSat_1)%>%
  sumtable(group="condition", group.test=list(format = '{pval}'),title="",digits=2,
           summ=c('mean(x)','sd(x)'),labels=labs_tab_b1,
           note="Note: Statistical significance markers: * p<0.1; ** p<0.05; *** p<0.01. 
           This table indicates successful randomization, with sociodemographic attributes 
           and personality characteristics evenly distributed across the nine experimental conditions.")


#################################################################################
#  Appendix C Principal Component Analysis (PCA) of Emotion
#################################################################################

# Table C1. PCA Loadings and Explained Variance of Emotional Responses
df_emotion<- data[ , c('Sadness','Anger',"Fear","Disgust","DoNotCare","Helplessness",
                       "Secured","Happiness","Curiosity","Surprised")] 

# check out correlation matrix 
cor_emotion<-cor(df_emotion)
mean(cor(df_emotion))
# run PCA using base R function princomp()
pca_emotion<-princomp(df_emotion) 
# information output
names(pca_emotion)
summary(pca_emotion,loadings=TRUE) # provided accurate variance but no eigenvalue (only sd)
#extract loadings and scores
pca_emotion$loadings 
pca_emotion_loadings<-round(loadings(pca_emotion)[],3)
head(pca_emotion$scores,5)
#calculate eigenvalues, proportion of variance and cumulative variance
eigenvalues<-pca_emotion$sdev^2
proportion_variance <- eigenvalues / sum(eigenvalues)
cumulative_variance <- cumsum(proportion_variance)
# create a dataframe with eigenvalues, proportion of variance and cumulative variance
# Extract loadings and convert to a matrix
loadings_matrix <- unclass(pca_emotion$loadings)

# Combine into one matrix
tab_c1 <- rbind(
  loadings_matrix,
  Eigenvalue = eigenvalues,
  Proportion = proportion_variance,
  Cumulative = cumulative_variance
)
# Round values for neat display
tab_c1 <- round(tab_c1, 3)
print(tab_c1)

# Figure C2. Scree plot of eigenvalues
fig_c2<-
  fviz_eig(pca_emotion,choice="eigenvalue",geom="line",ggtheme = theme_bw())+
  geom_hline(yintercept=1, linetype="dashed", color = "red")

# Figure C3. Scree plot of variance explained
fig_c3<-
  fviz_eig(pca_emotion,choice="variance",ggtheme = theme_bw())

# Figure C4.1. PCA variable plot Dim 1 and Dim 2
fig_c4.1<-fviz_pca_var(pca_emotion,
                                   col.var = "contrib", # Color by contributions to the PC
                                   gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
                                   repel = TRUE,
                                   ggtheme=theme_bw()) # Avoid text overlapping

# Figure C4.2. PCA variable plot Dim 2 and Dim 3
fig_c4.2<-fviz_pca_var(pca_emotion,axes = c(2, 3),
                                   col.var = "contrib", # Color by contributions to the PC
                                   gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
                                   repel = TRUE,
                                   ggtheme=theme_bw())

## create emotion variables based on PCA scores
## we take three component comp.3 as suggested...
pc_emotion = pca_emotion$scores
cor(pc_emotion) ### 5 components are independent from each other.
df_components_emotion<-data.frame(pc_emotion)
# correlation matrix of the components and the original variable
round(cor(df_emotion,pca_emotion$scores),3)
data$negative_emo<-pc_emotion[,"Comp.1"]
data$neutral_emo<-pc_emotion[,"Comp.3"]
data$positive_emo<-pc_emotion[,"Comp.2"]

## given the negative pca loading for positive and neutral emotoin, we reverse the pc of both.
positive_emo.df<-data.frame(data$positive_emo)
positive_emo.df<-positive_emo.df*(-1) # reverse positive emotion pca score by multiplying -1 
data$positive_emo_rv<-positive_emo.df$data.positive_emo
neutral_emo.df<-data.frame(data$neutral_emo)
neutral_emo.df<-neutral_emo.df*(-1) # reverse neutral emotion pca score by multiplying -1
data$neutral_emo_rv<-neutral_emo.df$data.neutral_emo

################################################################################
#  Appendix D Treatment effect 
################################################################################

## Calculate two different attitudinal outcomes by aggregating attitude items towards the fictional and 
## real-life scenario respectively.

data$general_attitude_fic<-rowSums(data[,c("Attitude.I_1","Attitude.I_2","Attitude.I_3","Attitude.I_4")])
data$general_attitude_real<-rowSums(data[,c("Attitude.II_1","Attitude.II_2","Attitude.II_3","Attitude.II_4")])

# Table D1. Average treatment effect on emotions and public attitudes 
# note: this table corresponds to Figure 2 in the main text.
mod_positive<-lm(positive_emo_rv~condition,data=data)
mod_negative<-lm(negative_emo~condition,data=data)
mod_neutral<-lm(neutral_emo_rv~condition,data=data)
mod_attitude_fic<-lm(general_attitude_fic~condition,data=data)
mod_attitude_real<-lm(general_attitude_real~condition,data=data)

tab_d1<-tab_model(mod_positive,mod_negative,mod_neutral,mod_attitude_fic,mod_attitude_real,
          digits=3,
          dv.labels = c("Positive Emotion","Negative Emotion","Neutral Emotion",
                        "Attitude towards fictional scenario","Attitude towards real-life scenario"),
          p.style = "stars",show.ci=FALSE,show.se = TRUE,collapse.se = TRUE)

# Table D2. Treatment effect on emotions and public attitudes: without control 
##convert two treatment dimensions (surveillance and censorship) into factor 

data$surveillance_fac <- factor(data$Surveillance, 
                                levels = c(0, 1, 2),
                                labels = c("No Mention",  "Personalized","Public"))
data$censorship_fac <- factor(data$Censorship, 
                              levels = c(0, 1, 2),
                              labels = c("No Mention", "Personalized", "Public"))

# Function to run both models (with and without interaction) for each outcome variable
run_decomposed_models <- function(outcome, data) {
  form_no_int <- as.formula(paste(outcome, "~ surveillance_fac + censorship_fac"))
  form_int    <- as.formula(paste(outcome, "~ surveillance_fac * censorship_fac"))
  
  list(
    no_interaction = lm_robust(form_no_int, data = data),
    interaction    = lm_robust(form_int,    data = data)
  )
}

# List of outcomes you want models for
outcomes <- c(
  "positive_emo_rv",
  "negative_emo",
  "neutral_emo_rv",
  "general_attitude_fic",
  "general_attitude_real"
)

# Run models for each outcome
models <- lapply(outcomes, run_decomposed_models, data = data)
names(models) <- outcomes

# Flatten the nested list into a single list of models
all_models <- unlist(models, recursive = FALSE)

### models without control, with and without interaction Table D2 
tab_d2<-tab_model(all_models$positive_emo_rv.no_interaction,all_models$positive_emo_rv.interaction,
          all_models$negative_emo.no_interaction,all_models$negative_emo.interaction,
          all_models$neutral_emo_rv.no_interaction,all_models$neutral_emo_rv.interaction,
          all_models$general_attitude_fic.no_interaction,all_models$general_attitude_fic.interaction,
          all_models$general_attitude_real.no_interaction,all_models$general_attitude_real.interaction,
          dv.labels = c("Positive Emotion \n (main)","Positive Emotion \n (main+interactions)",
                        "Negative Emotion \n (main)","Negative Emotion \n (main+interactions)",
                        "Neutral Emotion \n (main)","Neutral Emotion b\n (main+interactions)",
                        "Attitude towards fictitious scenario \n (main)", "Attitude towards fictitious scenario \n (main+interactions)",
                        "Attitude towards real-life scenario \n (main)", "Attitude towards real-life scenario \n (main+interactions)"),
          digits=3, p.style = "stars",show.ci=FALSE,show.se = TRUE,collapse.se = TRUE)


# Figure D3. Average treatment effect on discrete emotions 

# regressing discrete emotions 
sadness<-lm(Sadness~condition,data=data)
fear<-lm(Fear~condition,data=data)
anger<-lm(Anger~condition,data=data)
disgust<-lm(Disgust~condition,data=data)
helplessness<-lm(Helplessness~condition,data=data)
indifference<-lm(DoNotCare~condition,data=data)
happiness<-lm(Happiness~condition,data=data)
security<-lm(Secured~condition,data=data)
curiosity<-lm(Curiosity~condition,data=data)
surprise<-lm(Surprised~condition,data=data)

# Figure D3.1 Average treatment effect on discrete emotions (positive)
conditionlab<-c("Personal\nBoth","Personal\nCensorship","Personal Censorship&\nPublic Surveillance",
                "Personal\nSurveillance","Personal Surveillance&\nPublic Censorship",
                "Public\nBoth","Public\nCensorship", "Public\nSurveillance")

fig_d3.1<-multiplot(happiness,security,title="",
                    horizontal = TRUE,intercept = FALSE, pointSize = 2,dodgeHeight = 0.6,textAngle = 1)+
  scale_y_discrete(labels= conditionlab)+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
  ylab("condition")+ 
  xlab("Treatment effect on discrete emotions")+
  theme(legend.title=element_blank())

# Figure D3.2 Average treatment effect on discrete emotions (negative)
fig_d3.2<-multiplot(sadness,fear,anger,disgust,helplessness,title="",
                   horizontal = TRUE,intercept = FALSE, pointSize = 2,dodgeHeight = 0.6,textAngle = 1)+
  scale_y_discrete(labels= conditionlab)+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
  ylab("condition")+ 
  xlab("Treatment effect on discrete emotions")+
  theme(legend.title=element_blank())

# Figure D3.3 Average treatment effect on discrete emotions (neutral)
fig_d3.3<-multiplot(indifference,curiosity,surprise,title="",
                    horizontal = TRUE,intercept = FALSE, pointSize = 2,dodgeHeight = 0.6,textAngle = 1)+
  scale_y_discrete(labels= conditionlab)+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
  ylab("condition")+
  xlab("Treatment effect on discrete emotions")+
  theme(legend.title=element_blank())

################################################################################
# Appendix D.4 Mediation analysis
################################################################################
# data preparation for Table D4 
# Covariates string
covariates_str <- paste(
  "Age", "Gender", "Education", "Ethnicity", "MonthlyIncome", "CCPMembership",
  "rural", "OverseaExperience_yes", "nationalism_nor", "liberalism_nor", "vpnsavvy",
  "conservation_nor", "openness_nor", "conscientiousness_nor", "neuroticism_nor",
  "extraversion_nor", "agreeableness_nor", "LifeSat_1",
  sep = " + "
)

# Build formulas OUTSIDE a function
form_med_positive_a <- as.formula(
  paste("positive_emo_rv ~ condition +", covariates_str)
)
form_med_positive_bc_fic <- as.formula(
  paste("general_attitude_fic ~ condition + positive_emo_rv +", covariates_str)
)
form_med_positive_bc_real <- as.formula(
  paste("general_attitude_real ~ condition + positive_emo_rv +", covariates_str)
)
form_med_negative_a <- as.formula(
  paste("negative_emo ~ condition +", covariates_str)
)
form_med_negative_bc_fic <- as.formula(
  paste("general_attitude_fic ~ condition + negative_emo +", covariates_str)
)
form_med_negative_bc_real <- as.formula(
  paste("general_attitude_real ~ condition + negative_emo +", covariates_str)
)

# Fit models directly
med_positive_a <- lm(form_med_positive_a, data = data)
med_positive_bc_fic <- lm(form_med_positive_bc_fic, data = data)
med_positive_bc_real <- lm(form_med_positive_bc_real, data = data)
med_negative_a <- lm(form_med_negative_a, data = data)
med_negative_bc_fic <- lm(form_med_negative_bc_fic, data = data)
med_negative_bc_real <- lm(form_med_negative_bc_real, data = data)

## store all 8 results of positive - real/fic 
unique(data$condition) 
treatment_arms <- setdiff(unique(data$condition), "control")
# Mediator models: M ~ condition + covariates
mediator_models <- list(
  positive_emo_rv = med_positive_a,
  negative_emo    = med_negative_a
)
# Outcome models: Y ~ condition + M + covariates
# Nested by outcome then mediator (so we can pick the correct Y-model for each M)
outcome_models <- list(
  fic  = list(
    positive_emo_rv = med_positive_bc_fic,
    negative_emo    = med_negative_bc_fic
  ),
  real = list(
    positive_emo_rv = med_positive_bc_real,
    negative_emo    = med_negative_bc_real
  )
)

# Function to run all combinations of mediation analyses
run_all_mediations <- function(med_models, out_models, arms,
                               treat_var = "condition",
                               control_value = "control",
                               boot = TRUE, sims = 1000) {
  
  if (!length(med_models)) stop("No mediator models supplied.")
  if (!length(out_models)) stop("No outcome models supplied.")
  if (!length(arms)) stop("No treatment arms supplied.")
  
  res <- list()
  
  for (med_name in names(med_models)) {
    model_m <- med_models[[med_name]]
    
    for (out_name in names(out_models)) {
      # outcome model must match this mediator
      model_y <- out_models[[out_name]][[med_name]]
      if (is.null(model_y)) {
        stop(sprintf("Missing outcome model for outcome='%s' & mediator='%s'.",
                     out_name, med_name))
      }
      
      for (arm in arms) {
        key <- paste("results", med_name, out_name, arm, sep = "_")
        res[[key]] <- mediation::mediate(
          model.m = model_m,
          model.y = model_y,
          treat   = treat_var,
          mediator = med_name,          # name of M variable in your data
          control.value = control_value,
          treat.value   = arm,
          boot = boot,
          sims = sims                   # note: sims (plural)
        )
      }
    }
  }
  res
}
# Run all mediations, takes around 15-20 minutates
results <- run_all_mediations(
  med_models = mediator_models,
  out_models = outcome_models,
  arms       = treatment_arms
)

summary(results$results_positive_emo_rv_fic_PublicSurv)
names(results$results_positive_emo_rv_fic_PublicSurv)

# Function to extract estimates and confidence intervals from mediate object
extract_results <- function(med_res, label) {
  data.frame(
    Estimate = c(med_res$d0, med_res$z0, med_res$tau.coef, med_res$n0),
    LowerCI = c(med_res$d0.ci[1], med_res$z0.ci[1], med_res$tau.ci[1], med_res$n0.ci[1]),
    UpperCI = c(med_res$d0.ci[2], med_res$z0.ci[2], med_res$tau.ci[2], med_res$n0.ci[2]),
    Effect = factor(c("ACME", "ADE", "Total Effect","% Mediated"), levels = c("ACME", "ADE", "Total Effect","% Mediated")),
    Condition = label
  )
}

# Labels for each result set
labels <- paste("Result", 1:8)
# Extract results for each mediate object
results_list1 <-mapply(extract_results, results, labels, SIMPLIFY = FALSE)

# Combine all results into a single data frame
results_df1 <- bind_rows(results_list1)
# change the value label of Condition into treatment groups 
results_df1 <- results_df1 %>%
  mutate(Condition = case_when(
    Condition == "Result 1" ~ "Public Surv",
    Condition == "Result 2" ~ "Public Both",
    Condition == "Result 3" ~ "Personal. Surv & Public Censor",
    Condition == "Result 4" ~ "Personal. Censor & Public Surv",
    Condition == "Result 5" ~ "Personal. Both",
    Condition == "Result 6" ~ "Public Censor",
    Condition == "Result 7" ~ "Personal. Censor",
    Condition == "Result 8" ~ "Personal. Surv",
    TRUE ~ Condition  # Keeps the original value if no condition matches
  ))
# add new column to indicate mediator and outcome

# Create the new column values
mediator_outcome <- rep(c("positive_fic", "positive_real", "negative_fic", "negative_real"), each = 32)

# Add the new column
results_df1$Category <- mediator_outcome

### plotting Table D4 mediating effect of emotion on attidues (corresponding to Figure 3 in the main text)
tab_d4_df <- results_df1 %>%
  mutate(
    formatted = case_when(
      Effect == "% Mediated" ~ as.character(round(Estimate, 3)),
      TRUE ~ paste0(
        round(Estimate, 3), "\n[",
        round(LowerCI, 3), ", ",
        round(UpperCI, 3), "]"
      )
    )
  )

# Ensure clean column for pivoting (Condition only, without Category)
tab_d4_df <- tab_d4_df %>%
  mutate(
    Category = str_to_title(str_replace_all(Category, "_", " ")),  # e.g., positive_fic → Positive Fic
    Condition = str_trim(Condition)  # remove accidental spaces
  )

# Nest data by Category
nested_tables <- tab_d4_df %>%
  group_by(Category) %>%
  group_nest()

# Pivot each nested table to wide format
tables_by_category <- nested_tables %>%
  mutate(
    data = map(data, ~ .x %>%
                 dplyr::select(Effect, Condition, formatted) %>%
                 distinct() %>%
                 pivot_wider(
                   names_from = Condition,
                   values_from = formatted
                 ) %>%
                 arrange(factor(Effect, levels = c("ACME", "ADE", "Total Effect", "% Mediated")))
    )
  )

# Create a named list of tables
named_tables <- set_names(tables_by_category$data, tables_by_category$Category)
named_tables[["Positive Fic"]]
named_tables[["Negative Fic"]]

#combine subtables into one table
tab_d4 <- imap_dfr(
    named_tables,
    ~ mutate(.x, Category = .y, .before = 1)
  )
# Table D4
View(tab_d4)



# Table_D4. Mediation effects of emotions on public attitudes
tab_d4 %>%
  kbl(align = "l", booktabs = TRUE, linesep = "") %>%
  kable_styling(full_width = FALSE) %>%
  group_rows("Negative Fic", 1, 4) %>%
  group_rows("Negative Real", 5, 8) %>%
  group_rows("Positive Fic", 9, 12) %>%
  group_rows("Positive Real", 13, 16)

################################################################################
# Appendix D.5 Treatment effect heterogeneity
# Note: using multi-arm/multi-outcome causal forest 
################################################################################

set.seed(2023)
# set treatment as factor
data <- data %>% mutate(condition_fac = factor(condition))
W <- data$condition_fac

#choose covariates ONCE (keep names consistent with your data)
covariate_names <- c(
  "Age","Gender","Education","Ethnicity","MonthlyIncome","CCPMembership",
  "rural","OverseaExperience_yes","nationalism_nor","liberalism_nor","vpnsavvy","MediaUse.I",
  "conservation_nor","openness_nor","conscientiousness_nor","neuroticism_nor","extraversion_nor",
  "agreeableness_nor","LifeSat_1"
)

# Build model matrix (automatic dummies for factors; no intercept)
X <- model.matrix(~ . - 1, data = data[, covariate_names, drop = FALSE])

# Outcomes to iterate over 
# Replace the placeholders with your five outcomes
outcomes <- c(
  "positive_emo_rv",     # <- example from your code
  "negative_emo",
  "neutral_emo_rv",
  "general_attitude_fic",
  "general_attitude_real"
)

# Function to fit causal forest, predict and summarize 
fit_cf <- function(Y_name, X, W, num.trees = 4000, sample.fraction = 0.5) {
  Y <- data[[Y_name]]
  cf <- multi_arm_causal_forest(
    X = X, Y = Y, W = W,
    num.trees = num.trees, sample.fraction = sample.fraction, seed = 2023
  )
  # Out-of-bag predictions & (optional) variances
  pred <- predict(cf, drop = TRUE, estimate.variance = TRUE)
  pred_df <- as.data.frame(pred$predictions)
  var_est <- pred$variance.estimates  # matrix for multi-arm; keep as-is
  # ATEs (AIPW): returns a data.frame with columns 'estimate', 'std.err', 'contrast', 'outcome'
  ate_df <- tibble::as_tibble(average_treatment_effect(cf, target.sample = "all", method = "AIPW"))
  # Variable importance (numeric named vector)
  vi <- variable_importance(cf)
  list(
    outcome             = Y_name,
    forest              = cf,
    predictions_df      = pred_df,     # CATEs per unit x contrast
    pred_variance       = var_est,     # keep raw; shape can be vector or matrix
    ate_tbl             = ate_df,      # <-- use 'estimate' column instead of trying to coerce
    variable_importance = vi
  )
}
# Run for all outcomes 
results_hte <- outcomes %>%
  set_names() %>%
  map(~ fit_cf(.x, X = X, W = W))

# Extract variable importance table
outcomes <- names(results_hte)
covariate_names

# build a data frame: rows = covariates, cols = outcomes
vi_table <- tibble(variable = covariate_names)

for (oc in outcomes) {
  vimp <- results_hte[[oc]][["variable_importance"]]
  vi_table[[oc]] <- round(as.numeric(vimp), 3)   # round to 3 decimals
}
# Table D5 
tab_d5<-vi_table
view(tab_d5)

# Figure D5.1-5.5 HTE of six sociodemographic factors of all five outcomes
make_long_cf <- function(res, data, outcome_name) {
  # combine predictions with covariates
  cf_long <- res$predictions_df %>%
    bind_cols(data) %>%
    pivot_longer(
      cols = colnames(res$predictions_df),  # pivot all prediction columns
      names_to = "type",
      values_to = "value"
    )
  
  # Optionally recode names (adjust to your actual column names)
  cf_long$type <- recode(cf_long$type,
                         "Personalboth - control"             = "Personal both",
                         "PublicBoth - control"               = "Public both",
                         "PersonalCensor - control"           = "Personal Censor",
                         "PersonalSurv - control"             = "Personal Surveillance",
                         "PersonalCensorPublicSurv - control" = "Personal Censor +\nPublic Surveillance",
                         "PersonalSurvPublicCensor - control" = "Personal Surveillance +\nPublic Censor",
                         "PublicCensor - control"             = "Public Censor",
                         "PublicSurv - control"               = "Public Surveillance",
                         .default = cf_long$type   # keep others unchanged
  )
  
  cf_long$outcome <- outcome_name
  return(cf_long)
}
# Apply to all outcomes and combine into a named list
cf_long_list <- map(names(results_hte), function(outcome) {
  make_long_cf(results_hte[[outcome]], data, outcome)
}) %>%
  set_names(names(results_hte))

# plot hte 

plot_hte <- function(cf_long, covariate, method = "loess") {
  xlab <- if (covariate == "MediaUse.I") "foreign source consumption" else covariate
  ggplot(cf_long, aes(x = .data[[covariate]], y = value, color = type)) +
    stat_smooth(method = method, se = FALSE) +
    labs(
      y = "Estimated CATE",
      x = xlab,
      color = "Condition"
    ) +
    theme_bw()
}

# selected relevant covariate to plot 
covariates_to_plot <- c( "Education", "Age", "CCPMembership",
                        "OverseaExperience_yes", "vpnsavvy", "MediaUse.I")

# for combining plots

plots_all <- list()

for (outcome in names(cf_long_list)) {
  cf_long <- cf_long_list[[outcome]]
  outcome_plots <- map(covariates_to_plot, ~ plot_hte(cf_long, .x))
  combo <- wrap_plots(outcome_plots, ncol = 3) +
    plot_layout(guides = "collect") & theme(legend.position = "bottom")
  plots_all[[outcome]] <- combo
}

# Figure D5.1 HTE on positive emotions across six selected sociodemographic factors
plots_all$positive_emo_rv
# Figure D5.2 HTE on negative emotions across six selected sociodemographic factors
plots_all$negative_emo
# Figure D5.3 HTE on neutral emotions across six selected sociodemographic factors
plots_all$neutral_emo_rv
# Figure D5.4 HTE on attitudes towards fictional scenario across six selected sociodemographic factors
plots_all$general_attitude_fic
# Figure D5.5 HTE on attitudes towards real-life scenario across six selected sociodemographic factors
plots_all$general_attitude_real

################################################################################
#                       End of replication code 
################################################################################